-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[libcxx] Unwrap iterators in __find_segment #161274
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-libcxx Author: None (lbonn) ChangesThe segmented iterator optimized implementation of find now unwraps iterators when processing each segments. As a result, it is able to take better advantage to some find Full diff: https://github.com/llvm/llvm-project/pull/161274.diff 2 Files Affected:
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 5f32ae8fc9524..91c6a4e744a71 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -228,7 +228,8 @@ struct __find_segment {
template <class _InputIterator, class _Proj>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
- return std::__find(__first, __last, __value_, __proj);
+ return std::__rewrap_iter(
+ __first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value_, __proj));
}
};
diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
index afea31fb59e95..7780b5a92a6c4 100644
--- a/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/nonmodifying/find.bench.cpp
@@ -12,6 +12,7 @@
#include <cstddef>
#include <deque>
#include <list>
+#include <ranges>
#include <string>
#include <vector>
@@ -83,6 +84,20 @@ int main(int argc, char** argv) {
bm.template operator()<std::list<int>>("rng::find_if_not(list<int>) (" + comment + ")", ranges_find_if_not);
};
+ auto register_nested_container_benchmarks = [&](auto bm, std::string comment) {
+ // ranges_find
+ bm.template operator()<std::vector<std::vector<char>>>(
+ "rng::find(join_view(vector<vector<char>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::vector<std::vector<int>>>(
+ "rng::find(join_view(vector<vector<int>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::list<std::vector<int>>>(
+ "rng::find(join_view(list<vector<int>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::vector<std::list<int>>>(
+ "rng::find(join_view(vector<list<int>>)) (" + comment + ")", ranges_find);
+ bm.template operator()<std::deque<std::deque<int>>>(
+ "rng::find(join_view(deque<deque<int>>)) (" + comment + ")", ranges_find);
+ };
+
// Benchmark {std,ranges}::{find,find_if,find_if_not}(normal container) where we
// bail out after 25% of elements
{
@@ -142,6 +157,44 @@ int main(int argc, char** argv) {
register_benchmarks(bm, "process all");
}
+ // Benchmark {std,ranges}::{find,find_if,find_if_not}(join(normal container)) where we process the whole sequence
+ {
+ auto bm = []<class Container>(std::string name, auto find) {
+ benchmark::RegisterBenchmark(
+ name,
+ [find](auto& st) {
+ std::size_t const size = st.range(0);
+ std::size_t const seg_size = 256;
+ std::size_t const segments = (size + seg_size - 1) / seg_size;
+ using C1 = typename Container::value_type;
+ using ValueType = typename C1::value_type;
+ ValueType x = Generate<ValueType>::random();
+ ValueType y = random_different_from({x});
+ Container c(segments);
+ auto n = size;
+ for (auto it = c.begin(); it != c.end(); it++) {
+ it->resize(std::min(seg_size, n), x);
+ n -= it->size();
+ }
+
+ auto view = c | std::views::join;
+
+ for ([[maybe_unused]] auto _ : st) {
+ benchmark::DoNotOptimize(c);
+ benchmark::DoNotOptimize(y);
+ auto result = find(view.begin(), view.end(), y);
+ benchmark::DoNotOptimize(result);
+ }
+ })
+ ->Arg(8)
+ ->Arg(50) // non power-of-two
+ ->Arg(1024)
+ ->Arg(8192)
+ ->Arg(1 << 15);
+ };
+ register_nested_container_benchmarks(bm, "process all");
+ }
+
// Benchmark {std,ranges}::{find,find_if,find_if_not}(vector<bool>) where we process the whole sequence
{
auto bm = [](std::string name, auto find) {
|
The segmented iterator optimized implementation of find now unwraps
iterators when processing each segments.
As a result, it is able to take better advantage to some find
specializations: calling memchr/wmemchr for vector<vector<{char,int}>>
```
Benchmark Baseline Candidate Difference % Difference
-------------------------------------------------------------- ---------- ----------- ------------ --------------
rng::find(join_view(deque<deque<int>>))_(process_all)/1024 71.13 61.19 -9.94 -13.97
rng::find(join_view(deque<deque<int>>))_(process_all)/32768 2359.19 2237.02 -122.17 -5.18
rng::find(join_view(deque<deque<int>>))_(process_all)/50 16.88 17.59 0.71 4.20
rng::find(join_view(deque<deque<int>>))_(process_all)/8 15.59 16.10 0.51 3.27
rng::find(join_view(deque<deque<int>>))_(process_all)/8192 647.01 532.75 -114.26 -17.66
rng::find(join_view(list<vector<int>>))_(process_all)/1024 689.76 680.74 -9.02 -1.31
rng::find(join_view(list<vector<int>>))_(process_all)/32768 22284.95 21500.26 -784.69 -3.52
rng::find(join_view(list<vector<int>>))_(process_all)/50 32.77 32.12 -0.65 -1.98
rng::find(join_view(list<vector<int>>))_(process_all)/8 6.11 5.92 -0.19 -3.11
rng::find(join_view(list<vector<int>>))_(process_all)/8192 5527.88 5373.43 -154.45 -2.79
rng::find(join_view(vector<list<int>>))_(process_all)/1024 1305.59 1264.04 -41.55 -3.18
rng::find(join_view(vector<list<int>>))_(process_all)/32768 42840.88 43322.64 481.76 1.12
rng::find(join_view(vector<list<int>>))_(process_all)/50 57.52 62.35 4.82 8.38
rng::find(join_view(vector<list<int>>))_(process_all)/8 6.06 5.98 -0.07 -1.18
rng::find(join_view(vector<list<int>>))_(process_all)/8192 20700.53 21431.66 731.12 3.53
rng::find(join_view(vector<vector<char>>))_(process_all)/1024 310.64 18.34 -292.30 -94.09
rng::find(join_view(vector<vector<char>>))_(process_all)/32768 9424.96 531.99 -8892.97 -94.36
rng::find(join_view(vector<vector<char>>))_(process_all)/50 18.58 3.25 -15.32 -82.49
rng::find(join_view(vector<vector<char>>))_(process_all)/8 4.81 2.98 -1.84 -38.13
rng::find(join_view(vector<vector<char>>))_(process_all)/8192 2437.50 126.88 -2310.62 -94.79
rng::find(join_view(vector<vector<int>>))_(process_all)/1024 297.10 41.70 -255.39 -85.96
rng::find(join_view(vector<vector<int>>))_(process_all)/32768 9662.42 1822.05 -7840.36 -81.14
rng::find(join_view(vector<vector<int>>))_(process_all)/50 22.29 5.10 -17.19 -77.11
rng::find(join_view(vector<vector<int>>))_(process_all)/8 3.73 3.13 -0.60 -16.05
rng::find(join_view(vector<vector<int>>))_(process_all)/8192 2399.68 356.10 -2043.58 -85.16
```
d426559 to
b61c007
Compare
|
Small ping :) @philnik777 maybe you can help land this, I do not have commit access. |
|
@lbonn Please ping me again when the CI is done. |
|
@philnik777 it is now green. |
The segmented iterator optimized implementation of find now unwraps iterators when processing each segments.
As a result, it is able to take better advantage to some find
specializations: calling memchr/wmemchr for vector<vector<{char,int}>>